# load the data in csv
gsk_data <- read.csv("gsk_covid_data.csv")
head(gsk_data)
##   YearWeekISO ReportingCountry Denominator NumberDosesReceived
## 1    2020-W53               AT     8901064                   0
## 2    2020-W53               AT     8901064                   0
## 3    2020-W53               AT     8901064               61425
## 4    2020-W53               AT     8901064                   0
## 5    2020-W53               AT     8901064                   0
## 6    2020-W53               AT     8901064                   0
##   NumberDosesExported FirstDose FirstDoseRefused SecondDose DoseAdditional1
## 1                   0         0               NA          0               0
## 2                   0         0               NA          0               0
## 3                   0      5314               NA          0               0
## 4                   0         0               NA          0               0
## 5                   0         0               NA          0               0
## 6                   0         3               NA          0               0
##   UnknownDose Region TargetGroup Vaccine Population
## 1           0     AT         ALL      AZ    8901064
## 2           0     AT         ALL   JANSS    8901064
## 3           0     AT         ALL     COM    8901064
## 4           0     AT         ALL    NVXD    8901064
## 5           0     AT         ALL     MOD    8901064
## 6           0     AT         ALL     UNK    8901064
# convert the ISOWeekYear column to dates
gsk_data$Date <-  ISOweek::ISOweek2date(paste0(gsk_data$YearWeekISO, "-1"))

# factoring the target age groups 
# and combining them into fewer groups 

gsk_data <- gsk_data%>%
  mutate(Groups = ifelse(TargetGroup =="Age<18" | TargetGroup== "Age0_4"| TargetGroup=="Age5_9" |TargetGroup=="Age10_14"| TargetGroup== "Age15_17", "Under 18",
            ifelse(TargetGroup=="Age18_24" | TargetGroup == "Age25_49" | TargetGroup=="Age50_59" |  TargetGroup=="ALL" |TargetGroup=="1_Age<60","18-59",                        ifelse(TargetGroup=="Age60_69" | TargetGroup=="Age70_79" | TargetGroup=="Age80+" | TargetGroup=="1_Age60+", "60+",
               ifelse(TargetGroup=="LTCF","LTC residencts",
                ifelse(TargetGroup=="HCW","Healthcare workers","Not known"))))))

# Convert countries ISO codes to country names 
gsk_data$Country <- countrycode(gsk_data$ReportingCountry, origin = "iso2c", destination = "country.name")
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: EL
# The above line didn't convert the country code "EL" , so I have to google what country was it.-- The EL code is for Greece

gsk_data$Country <- with(gsk_data, ifelse(ReportingCountry == "EL", "Greece", Country))


# Create new variable: number of days
country_gsk <- gsk_data %>% group_by(Country) %>% mutate(cum_FirstDoses=cumsum(as.numeric(FirstDose)),cum_SecondDoses=cumsum(SecondDose), days = Date - first(Date) + 1)


# Aggregate at world level
world <- country_gsk %>% group_by(Date) %>% summarize(People_vaccinated=sum(cum_FirstDoses), Fully_vaccinated=sum(cum_SecondDoses))
# 
# SUMMARY STATISTICS
summary(country_gsk)
##  YearWeekISO        ReportingCountry    Denominator       NumberDosesReceived
##  Length:250142      Length:250142      Min.   :     968   Min.   :      0    
##  Class :character   Class :character   1st Qu.:   94884   1st Qu.:      0    
##  Mode  :character   Mode  :character   Median :  265997   Median :      0    
##                                        Mean   : 1730981   Mean   :  48778    
##                                        3rd Qu.: 1051166   3rd Qu.:      0    
##                                        Max.   :83166711   Max.   :8956098    
##                                        NA's   :103115     NA's   :210596     
##  NumberDosesExported   FirstDose       FirstDoseRefused   SecondDose     
##  Min.   :      0     Min.   :      0   Min.   : 0.0     Min.   :      0  
##  1st Qu.:      0     1st Qu.:      0   1st Qu.: 0.0     1st Qu.:      0  
##  Median :      0     Median :      1   Median : 0.0     Median :      0  
##  Mean   :   3957     Mean   :   3635   Mean   : 0.4     Mean   :   3292  
##  3rd Qu.:      0     3rd Qu.:    158   3rd Qu.: 0.0     3rd Qu.:     65  
##  Max.   :4100000     Max.   :3374693   Max.   :73.0     Max.   :3067087  
##  NA's   :221219                        NA's   :248701                    
##  DoseAdditional1    UnknownDose          Region          TargetGroup       
##  Min.   :      0   Min.   :     0.0   Length:250142      Length:250142     
##  1st Qu.:      0   1st Qu.:     0.0   Class :character   Class :character  
##  Median :      0   Median :     0.0   Mode  :character   Mode  :character  
##  Mean   :   2551   Mean   :    27.2                                        
##  3rd Qu.:      0   3rd Qu.:     0.0                                        
##  Max.   :5538935   Max.   :429061.0                                        
##                                                                            
##    Vaccine            Population            Date               Groups         
##  Length:250142      Min.   :   38747   Min.   :2020-12-07   Length:250142     
##  Class :character   1st Qu.: 5525292   1st Qu.:2021-04-19   Class :character  
##  Mode  :character   Median : 5525292   Median :2021-08-09   Mode  :character  
##                     Mean   :18372430   Mean   :2021-08-05                     
##                     3rd Qu.:37958138   3rd Qu.:2021-11-22                     
##                     Max.   :83166711   Max.   :2022-03-07                     
##                                                                               
##    Country          cum_FirstDoses      cum_SecondDoses         days         
##  Length:250142      Min.   :        0   Min.   :        0   Length:250142    
##  Class :character   1st Qu.:  4447587   1st Qu.:  1217460   Class :difftime  
##  Mode  :character   Median : 20956910   Median : 11596665   Mode  :numeric   
##                     Mean   : 32060081   Mean   : 25022425                    
##                     3rd Qu.: 28023967   3rd Qu.: 25187945                    
##                     Max.   :209552173   Max.   :203202213                    
## 
by(country_gsk$FirstDose,country_gsk$Country, summary)
## country_gsk$Country: Austria
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      2.5   3001.3    289.0 280835.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Belgium
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      95    6976    1518  537071 
## ------------------------------------------------------------ 
## country_gsk$Country: Bulgaria
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       1     128    1723    1123   68678 
## ------------------------------------------------------------ 
## country_gsk$Country: Croatia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       4    1038     191  150275 
## ------------------------------------------------------------ 
## country_gsk$Country: Cyprus
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     1.0    18.0   579.9   190.2 34973.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Czechia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      21     326    5759    2624  396859 
## ------------------------------------------------------------ 
## country_gsk$Country: Denmark
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      1.0     15.0   3947.6    490.5 284062.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Estonia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     2.0    53.5   758.9   373.2 29289.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Finland
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      0.0    268.3      2.0 197521.0 
## ------------------------------------------------------------ 
## country_gsk$Country: France
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0     228    1409   20140    8959 2307972 
## ------------------------------------------------------------ 
## country_gsk$Country: Germany
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       8   22668  132439  344094  413201 3374693 
## ------------------------------------------------------------ 
## country_gsk$Country: Greece
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      1.0   4589.5    632.5 321731.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Hungary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       1      58    3536     836  322903 
## ------------------------------------------------------------ 
## country_gsk$Country: Iceland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0     210       9   20893 
## ------------------------------------------------------------ 
## country_gsk$Country: Ireland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       2      27    3791     750  162696 
## ------------------------------------------------------------ 
## country_gsk$Country: Italy
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      77    1992   23286   13104 2532493 
## ------------------------------------------------------------ 
## country_gsk$Country: Latvia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0    19.0   855.3   435.0 60035.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Liechtenstein
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0    30.5     4.0  1974.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Lithuania
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      10    1379     555   80214 
## ------------------------------------------------------------ 
## country_gsk$Country: Luxembourg
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     2.0   325.3    70.0 23738.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Malta
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.00     0.00     1.00   345.97    42.25 22554.00 
## ------------------------------------------------------------ 
## country_gsk$Country: Netherlands
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       2   21353    3035  828423 
## ------------------------------------------------------------ 
## country_gsk$Country: Norway
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       5      57    4611     865  313863 
## ------------------------------------------------------------ 
## country_gsk$Country: Poland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       1    1447     142 1006740 
## ------------------------------------------------------------ 
## country_gsk$Country: Portugal
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       2      53    5952    1737  384115 
## ------------------------------------------------------------ 
## country_gsk$Country: Romania
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      13     331   11550    4511 1238654 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovakia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      13     318    3235    1802  144503 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovenia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     5.0    90.0  1459.4   851.8 46914.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Spain
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      23   22391    2529 1512322 
## ------------------------------------------------------------ 
## country_gsk$Country: Sweden
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      46    5381    2048  369575
by(country_gsk$SecondDose,country_gsk$Country, summary)
## country_gsk$Country: Austria
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0    2746      70  288205 
## ------------------------------------------------------------ 
## country_gsk$Country: Belgium
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       9    6569     836  515430 
## ------------------------------------------------------------ 
## country_gsk$Country: Bulgaria
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0    15.0  1280.3   622.8 62541.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Croatia
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      1.0    915.2     90.0 142011.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Cyprus
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     7.0   554.1   151.0 34486.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Czechia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0     114    5348    1974  399364 
## ------------------------------------------------------------ 
## country_gsk$Country: Denmark
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0     10.0   3892.3    459.5 275901.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Estonia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      16     683     295   34122 
## ------------------------------------------------------------ 
## country_gsk$Country: Finland
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      0.0    255.1      1.0 186999.0 
## ------------------------------------------------------------ 
## country_gsk$Country: France
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0     172    1737   19529    9489 2217706 
## ------------------------------------------------------------ 
## country_gsk$Country: Germany
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    1188   98405  337404  366090 3067087 
## ------------------------------------------------------------ 
## country_gsk$Country: Greece
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0    4110     126  311581 
## ------------------------------------------------------------ 
## country_gsk$Country: Hungary
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      27    3288     614  283256 
## ------------------------------------------------------------ 
## country_gsk$Country: Iceland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0   167.6     5.0 26499.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Ireland
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      8.0   3510.2    529.5 146253.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Italy
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##       0.0       0.0     774.5   19698.0    8614.0 2276068.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Latvia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     2.0   655.1   206.5 60908.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Liechtenstein
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    0.00   30.87    5.00 1919.00 
## ------------------------------------------------------------ 
## country_gsk$Country: Lithuania
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0    1148     267   70575 
## ------------------------------------------------------------ 
## country_gsk$Country: Luxembourg
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     0.00     0.00     0.00   290.00    36.25 23722.00 
## ------------------------------------------------------------ 
## country_gsk$Country: Malta
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     0.0     0.0   315.9    24.0 23238.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Netherlands
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      0.0  18238.7    141.2 755671.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Norway
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       1      37    4382     895  275825 
## ------------------------------------------------------------ 
## country_gsk$Country: Poland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0    1252      26  978926 
## ------------------------------------------------------------ 
## country_gsk$Country: Portugal
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       8    4718     784  340875 
## ------------------------------------------------------------ 
## country_gsk$Country: Romania
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      60    8548    1912 1005262 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovakia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0     111    2975    1468  129452 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovenia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     0.0     1.0    46.0  1260.7   626.5 42148.0 
## ------------------------------------------------------------ 
## country_gsk$Country: Spain
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       1   18826    1061 1379634 
## ------------------------------------------------------------ 
## country_gsk$Country: Sweden
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0      11    5253    2074  352495
by( country_gsk$cum_FirstDoses,country_gsk$Country, summary)
## country_gsk$Country: Austria
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  3535564 10846012  8546158 12687807 13613912 
## ------------------------------------------------------------ 
## country_gsk$Country: Belgium
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  8485644 16933347 13229333 17680926 18102150 
## ------------------------------------------------------------ 
## country_gsk$Country: Bulgaria
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 1526250 2574466 2484329 3840577 4206621 
## ------------------------------------------------------------ 
## country_gsk$Country: Croatia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      54 1351331 3463370 2926670 4370493 4672366 
## ------------------------------------------------------------ 
## country_gsk$Country: Cyprus
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     369  860132 1197626  990939 1284516 1326865 
## ------------------------------------------------------------ 
## country_gsk$Country: Czechia
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     1268  5933282 12172920  9803416 13649680 14368980 
## ------------------------------------------------------------ 
## country_gsk$Country: Denmark
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2 2920713 8709146 6435332 9179238 9486039 
## ------------------------------------------------------------ 
## country_gsk$Country: Estonia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     191  815202 1381946 1156786 1608709 1648340 
## ------------------------------------------------------------ 
## country_gsk$Country: Finland
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  6915088 22013992 17135512 25387592 26790924 
## ------------------------------------------------------------ 
## country_gsk$Country: France
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##         0  51630217 174879219 132880196 200911760 209552173 
## ------------------------------------------------------------ 
## country_gsk$Country: Germany
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##     15280  50097568 102855707  81677718 113815285 120776846 
## ------------------------------------------------------------ 
## country_gsk$Country: Greece
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        1  4787446 17464415 13976129 21090572 23860622 
## ------------------------------------------------------------ 
## country_gsk$Country: Hungary
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##     1095  9877608 11400708  9884901 12006423 12548921 
## ------------------------------------------------------------ 
## country_gsk$Country: Iceland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4943  341086  564056  451463  580088  604268 
## ------------------------------------------------------------ 
## country_gsk$Country: Ireland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 2397106 6764781 5426079 8013278 8426329 
## ------------------------------------------------------------ 
## country_gsk$Country: Italy
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##       510  47146520 115258814  95626620 140709832 151405965 
## ------------------------------------------------------------ 
## country_gsk$Country: Latvia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  831293 1648962 1561026 2506981 2634323 
## ------------------------------------------------------------ 
## country_gsk$Country: Liechtenstein
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   19684   42646   35147   49613   51248 
## ------------------------------------------------------------ 
## country_gsk$Country: Lithuania
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1709 2968093 5096021 4145516 5695584 5839062 
## ------------------------------------------------------------ 
## country_gsk$Country: Luxembourg
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  397879  788062  631632  871255  913337 
## ------------------------------------------------------------ 
## country_gsk$Country: Malta
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      57  581864  820514  669447  845526  875987 
## ------------------------------------------------------------ 
## country_gsk$Country: Netherlands
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  2654044 12185298  8676940 13172544 13452121 
## ------------------------------------------------------------ 
## country_gsk$Country: Norway
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      68 2686410 7600808 5636342 8434220 8600028 
## ------------------------------------------------------------ 
## country_gsk$Country: Poland
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0 29839227 75697417 59042537 83591796 87951410 
## ------------------------------------------------------------ 
## country_gsk$Country: Portugal
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        1  6302537 21119240 17128505 26715735 28051011 
## ------------------------------------------------------------ 
## country_gsk$Country: Romania
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0 14632156 16731103 17614096 24027701 24729124 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovakia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 3403157 4632169 3976987 5185236 5518906 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovenia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4929 1063740 1892978 1655027 2344212 2469237 
## ------------------------------------------------------------ 
## country_gsk$Country: Spain
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0 21313474 69345971 53133420 79224850 84637228 
## ------------------------------------------------------------ 
## country_gsk$Country: Sweden
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  4487336 19332779 14175652 21581337 22217442
by( country_gsk$cum_SecondDoses,country_gsk$Country, summary)
## country_gsk$Country: Austria
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  1469910  9361981  6866137 10983598 12456006 
## ------------------------------------------------------------ 
## country_gsk$Country: Belgium
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  3226107 15580523 11024056 16587317 17045724 
## ------------------------------------------------------------ 
## country_gsk$Country: Bulgaria
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  973637 2089475 1825964 2775747 3126595 
## ------------------------------------------------------------ 
## country_gsk$Country: Croatia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      10  439051 3045238 2305148 3563967 4118269 
## ------------------------------------------------------------ 
## country_gsk$Country: Cyprus
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  562066 1093466  862242 1194055 1267684 
## ------------------------------------------------------------ 
## country_gsk$Country: Czechia
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  2438339 10926587  8121878 12333708 13342848 
## ------------------------------------------------------------ 
## country_gsk$Country: Denmark
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 1365295 7610941 5603357 8926170 9353098 
## ------------------------------------------------------------ 
## country_gsk$Country: Estonia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       3  399212 1149289  939825 1418594 1483397 
## ------------------------------------------------------------ 
## country_gsk$Country: Finland
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0   600918 11721948 12319337 23829046 25472560 
## ------------------------------------------------------------ 
## country_gsk$Country: France
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##         0  17845829 128999519 106614240 176360069 203202213 
## ------------------------------------------------------------ 
## country_gsk$Country: Germany
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##         9  14343533  92473699  68716405 108219158 118428969 
## ------------------------------------------------------------ 
## country_gsk$Country: Greece
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  2403493 14834681 11483226 18416581 21365553 
## ------------------------------------------------------------ 
## country_gsk$Country: Hungary
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  5840922 10645136  8450470 11153613 11668925 
## ------------------------------------------------------------ 
## country_gsk$Country: Iceland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  148756  443228  330286  463574  482284 
## ------------------------------------------------------------ 
## country_gsk$Country: Ireland
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 1023496 5268144 4388379 7408322 7803185 
## ------------------------------------------------------------ 
## country_gsk$Country: Italy
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##         0  20238998  93171868  72679943 119334144 128076397 
## ------------------------------------------------------------ 
## country_gsk$Country: Latvia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  264105 1303206 1133466 1843502 2017604 
## ------------------------------------------------------------ 
## country_gsk$Country: Liechtenstein
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   11848   41047   32588   49413   51861 
## ------------------------------------------------------------ 
## country_gsk$Country: Lithuania
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 1668151 4149219 3247694 4647502 4860520 
## ------------------------------------------------------------ 
## country_gsk$Country: Luxembourg
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  171424  690073  508542  759521  814319 
## ------------------------------------------------------------ 
## country_gsk$Country: Malta
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  264510  747364  559273  771674  799935 
## ------------------------------------------------------------ 
## country_gsk$Country: Netherlands
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0   914281  8499265  6467141 11077512 11490383 
## ------------------------------------------------------------ 
## country_gsk$Country: Norway
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  703070 4025720 4456456 7787429 8171771 
## ------------------------------------------------------------ 
## country_gsk$Country: Poland
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0 10582232 66842914 47336315 71828887 76066646 
## ------------------------------------------------------------ 
## country_gsk$Country: Portugal
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  2171383 14842889 12359960 21322574 22236866 
## ------------------------------------------------------------ 
## country_gsk$Country: Romania
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0 13120952 14038750 13647452 17616590 18300904 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovakia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0 1711154 4268918 3365103 4685641 5075682 
## ------------------------------------------------------------ 
## country_gsk$Country: Slovenia
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  555251 1529078 1283901 1941780 2133074 
## ------------------------------------------------------------ 
## country_gsk$Country: Spain
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  8641249 53932113 41620085 68704961 71160465 
## ------------------------------------------------------------ 
## country_gsk$Country: Sweden
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##        0  1888769 12883071 11714568 20753721 21691381
# second doses by age groups
by( country_gsk$SecondDose,country_gsk$Groups, summary)
## country_gsk$Groups: 18-59
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       5    7299     751 3067087 
## ------------------------------------------------------------ 
## country_gsk$Groups: 60+
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       1    2248     142 1705272 
## ------------------------------------------------------------ 
## country_gsk$Groups: Healthcare workers
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      1.0    642.7     57.0 337716.0 
## ------------------------------------------------------------ 
## country_gsk$Groups: LTC residencts
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##      0.0      0.0      0.0    294.2     12.0 122351.0 
## ------------------------------------------------------------ 
## country_gsk$Groups: Not known
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
##    0.000    0.000    0.000    8.661    0.000 4485.000 
## ------------------------------------------------------------ 
## country_gsk$Groups: Under 18
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0     419       0  451055
# select records for Italy

gsk_ItalyData <- country_gsk%>%
 filter(Country == "Italy")%>%
  select(YearWeekISO,Date, Population, FirstDose, SecondDose,Vaccine ,Groups)
## Adding missing grouping variables: `Country`
# vaccination progress


vaccine_gsk <- gsk_ItalyData %>%
  group_by(Date)%>%
  mutate(`First dose` = round(sum(FirstDose)/ Population * 100, 3),
         `Fully vaccinated` = round(sum(SecondDose)/ Population  * 100, 3)) %>%
  select(Date, `First dose`, `Fully vaccinated`)%>%
  gather("doses", "Population share", -c(Date)) %>%
  mutate(doses = factor(doses))

vac_gsk <- ggplot(data = vaccine_gsk, aes(x = Date, y = `Population share`, fill = doses)) +
  geom_area(position = "identity") +
  theme_classic() +
  scale_fill_brewer(palette = "Green") +
  scale_x_date(date_breaks = "4 month") +
  labs(title = "Covid-19 vaccination progress in Italy ",
       x = "Date", y = "Share of Population(%)",
       fill = NULL, caption = "Data: GSK") +
  theme(plot.title = element_text(hjust = -0.25), legend.position = "bottom")
## Warning in pal_name(palette, type): Unknown palette Green
ggplotly(vac_gsk) %>%
   layout(hovermode = "x", legend = list(orientation = "h", xanchor = "center", x = 0.5, y = -0.1))

The above results for vaccination progress for Italy using the data provided, doesnt seems right. I wanted to check the vaccination progress for Italy using the COVID 19 data available online and I got the results below which seems to be correct.

Italy <- read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv") %>%
  filter(location == "Italy")%>%
  select(date, population, people_vaccinated, people_fully_vaccinated)
## Rows: 169939 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (4): iso_code, continent, location, tests_units
## dbl  (62): total_cases, new_cases, new_cases_smoothed, total_deaths, new_dea...
## date  (1): date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Vaccination progress

# Find out the rows where the second dose data are NAs when first dose were given at the start of the vaccination process
rows <- which(is.na(Italy$people_fully_vaccinated) & !is.na(Italy$people_vaccinated))
Italy[rows, "people_fully_vaccinated"] = 0  # Change the NA values to 0

vaccine <- Italy %>%
  mutate(`First dose` = round(people_vaccinated / population * 100, 3),
         `Fully vaccinated` = round(people_fully_vaccinated / population  * 100, 3)) %>%
  select(date, `First dose`, `Fully vaccinated`)%>%
  drop_na(`First dose`) %>%
  gather("doses", "Population share", -c(date)) %>%
  mutate(doses = factor(doses))

vac <- ggplot(data = vaccine, aes(x = date, y = `Population share`, fill = doses)) +
  geom_area(position = "identity") +
  theme_classic() +
  scale_fill_brewer(palette = "Green") +
  scale_x_date(date_breaks = "2 month") +
  labs(title = "Covid-19 vaccination progress in Italy ",
       x = "Date", y = "Share of Population(%)",
       fill = NULL, caption = "Data: OWID") +
  theme(plot.title = element_text(hjust = -0.25), legend.position = "bottom")
## Warning in pal_name(palette, type): Unknown palette Green
ggplotly(vac) %>%
   layout(hovermode = "x", legend = list(orientation = "h", xanchor = "center", x = 0.5, y = -0.1))
# logistic regression model to check the association between age groups and getting vaccinated 
kk <- gsk_data

kk <- kk %>% 
  mutate(vaccinated = ifelse(FirstDose >0 ,1,0))%>%
  mutate(fullyvaxed= ifelse(SecondDose>0,1,0))

kk$vaccinated <- factor(kk$vaccinated)

fit1 <- glm(vaccinated ~ Groups, data = kk, family = binomial)
summary(fit1)
## 
## Call:
## glm(formula = vaccinated ~ Groups, family = binomial, data = kk)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5383  -0.7436   0.8552   0.9185   1.7858  
## 
## Coefficients:
##                           Estimate Std. Error  z value Pr(>|z|)    
## (Intercept)               0.817494   0.007253  112.706  < 2e-16 ***
## Groups60+                -0.359183   0.011132  -32.267  < 2e-16 ***
## GroupsHealthcare workers -0.172567   0.023293   -7.409 1.28e-13 ***
## GroupsLTC residencts     -0.927008   0.033671  -27.532  < 2e-16 ***
## GroupsNot known          -2.185069   0.021262 -102.768  < 2e-16 ***
## GroupsUnder 18           -1.961626   0.011270 -174.059  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 346722  on 250141  degrees of freedom
## Residual deviance: 302640  on 250136  degrees of freedom
## AIC: 302652
## 
## Number of Fisher Scoring iterations: 4
# exponentiate the coefficients 
coef(fit1)%>%exp()
##              (Intercept)                Groups60+ GroupsHealthcare workers 
##                2.2648175                0.6982463                0.8415019 
##     GroupsLTC residencts          GroupsNot known           GroupsUnder 18 
##                0.3957360                0.1124699                0.1406296
# 95% CI
confint(fit1)%>%exp()
## Waiting for profiling to be done...
##                              2.5 %    97.5 %
## (Intercept)              2.2328771 2.2972749
## Groups60+                0.6831774 0.7136481
## GroupsHealthcare workers 0.8040174 0.8808876
## GroupsLTC residencts     0.3704475 0.4227197
## GroupsNot known          0.1078645 0.1172401
## GroupsUnder 18           0.1375546 0.1437676
library(broom)

fit1 %>% 
  tidy(conf.int= TRUE, exp = TRUE)
## # A tibble: 6 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)             2.26    0.00725    113.   0            2.23      2.30 
## 2 Groups60+               0.698   0.0111     -32.3  2.05e-228    0.683     0.714
## 3 GroupsHealthcare wo…    0.842   0.0233      -7.41 1.28e- 13    0.804     0.881
## 4 GroupsLTC residencts    0.396   0.0337     -27.5  7.37e-167    0.370     0.423
## 5 GroupsNot known         0.112   0.0213    -103.   0            0.108     0.117
## 6 GroupsUnder 18          0.141   0.0113    -174.   0            0.138     0.144
# I dont think this is any useful model because of I didnt not see any other variable in the data which I could think of  using in a model
# However in this model the aim was to explore if there was any association between between age group and getting vaccinated at first time (first dose)
# here the reference group is the 18-59 age group and the outcome variable is getting vaccinated (first dose (Y/N))